***************************************************************************
*	CREATE BASIC VARIABLES
* 	Prior to running this program run 0.Data-Cleaning to create the file Union-data.dta
***************************************************************************
clear all
capture log close
cd C:\temp\Data&Methods\DataCleaning
log using "analysis`c(current_date)'.log", replace

set more 1
set scheme sol
set autotabgraphs on  

use Union-data.dta
save workingdata.dta, replace
macro define data "workingdata.dta, clear"


use $data

	drop TotalProfit totalProfitCycle

	rename timeChoice decision_time
		label variable decision_time  "Decision time"
	rename Period period_session
		label variable period_session"Period in Session"
	rename Session session
		label variable session  "Session" 
	rename Group group
	rename periodCycle period
		label variable period "Period in supergame"
	rename Subject subject
	rename Treatment treatment
		label variable treatment "Treatment"
		label define treatl 1 "Neutral" 2 "Converge" 3 "Diverge" 4 "Neutral+" 5 "Neutral-Chat" 
		label values treatment treatl
	rename Profit profit
	rename groupSize groupsize
		label variable groupsize "Economy Size"
		label variable ymd "Session Date"
		label variable profit "Points earned in the period"
	
*	Sum of two variables below must = # of producers in the economy (1,6, 8 or 12 depending on the economy)
	label variable outcomePairY "Y outcomes in Economy"
	label variable outcomePairZ "Z outcomes in Economy"
	
	
*== VOTE variable
*      In the data: 2=GREEN has low payoff 	  1=RED has Middle payoff   3=BLUE has high payoff
*	Voting choice OPT for country i=1,2,3 is: 1=leave, 4=stay, 2=exclude country +1 (=REd if 4), 3= exclude country -1 (=BLUE if 0)
*	Define vote: 1=leave, 5= keep as is (all 24), 2=green out, 3=red out, 4=blue out

	gen vote = 1 if opt == 1
	replace vote = 5 if opt ==4
	**Exclude weak GREEN=2
	replace vote = 2 if country ==1 & opt ==2
	replace vote = 2 if country ==3 & opt ==3
	**Exclude Middle RED=1
	replace vote = 3 if country ==3 & opt ==2
	replace vote = 3 if country ==2 & opt ==3 
	**Exclude strong BLUE=3
	replace vote = 4 if country ==1 & opt ==3
	replace vote = 4 if country ==2 & opt ==2
		
	label define votel 1 "Leave" 2 "Out Disad." 3 "Out Middle"  4 "Out Advan."  5  "Stay"
	label values vote votel	

	label variable vote "Vote for groups"
	drop opt

*== New VOTE variable (lump the two exclusion decisions together)
	gen voteNEW=vote
	replace voteNEW=2 if vote==3 | vote==4
	replace voteNEW=3 if vote==5
	label define votenewl 1 "Leave"  2 "Exclude"   3 "Stay"
	label values voteNEW votenewl
	label variable voteNEW "Vote is 1,2 or 3"

*==Define three kinds of votes: LEAVE, EXCLUDE, STAY
	gen vote1=(voteNEW==1)
	label variable vote1 "Leave"
	gen vote2=(voteNEW==2)
	label variable vote2 "Exclude"
	gen vote3=(voteNEW==3)
	label variable vote3 "Stay"

*	Redefine COUNTRY: originally 1=RED with Middle payoff  2=GREEN with low payoff 3=BLUE with high payoff
	replace country=country-1 if country<3
	replace country=country+2 if country==0
	label define countryl 1 "Disadv." 2 "Middle" 3 "Advan."
	label variable country "Country"
	label values country countryl

*	Redefine Opponent's COUNTRY: 1=RED with Middle payoff  2=GREEN with low payoff 3=BLUE with high payoff)
	replace matchCountry=matchCountry-1 if matchCountry<3
	replace matchCountry=matchCountry+2 if matchCountry==0
	rename matchCountry country_o
	label variable country_o "Opponent's Country"
	label values country_o countryl
	
*	Define beta threshold by country
	gen k=11+ 2*(country-1) 
	
	gen a=3 
	replace a=5 if treat==4 | (treat==2 & country==1) | (treat==3 & country==3)
	replace a=1 if (treat==2 & country==3) | (treat==3 & country==1)
	replace a=0 if groupsize==2
	label variable k "Cooperation payoff -- fixed pairs  (isolated economies)"
	label variable a "Gain in cooperation payoff -- mixed groups (integrated economies)"
	
	gen beta_star=6/(a+k-3)
	label variable beta_star "threshold beta for full coop"

*	Normalize TYPES  0=consumer, 1=producer
	replace type=type-1
	label define typel 0 "Consumer"   1 "Producer" 
	label values type typel
	label variable type "Role of subject"

*	Label CHOICES  (in the data: 0= no choice to make)
	replace choice=. if choice==0
	replace choice=choice-1 if choice>0
	label define choicel 0 "D"  1 "C"   
	label values choice choicel
	label variable choice "Choice D=0  C=1"

*	Opponent' CHOICES (data: 0= no choice to make)
	replace otherChoice=. if otherChoice==0
	replace otherChoice=otherChoice-1 if otherChoice>0
	rename otherChoice otherchoice
	label values otherchoice choicel
	label variable otherchoice "Opponent's choice"
		
*	Opponent' CHOICES depending on the group size in supergames 1-4
	gen otherchoice2=otherchoice if  groupsize==2
	gen otherchoice12=otherchoice  if groupsize==12

* 	Supergame and its duration
	gen game=cycle
	label variable game  "Supergame" 
	drop cycle
	by treatment session game (period), sort: gen duration=period[_N]
	label variable duration  "Periods in game" 

* 	LAGGED DURATION (previous supergame, Min = 18 periods so set to 18 for supergame 1)
	bysort treatment session subject (game period): gen duration_lagged=duration[_n-1] if period==1
		replace duration_lagged=18 if game==1
	by treatment session subject game  (period), sort:  gen temp=duration_lagged[1]
	replace duration_lagged=temp
	drop temp
	label variable duration_lagged  "Rounds in previous supergame" 

* 	TOTAL DURATION: Number of rounds played in the PREVIOUS supergames
	bysort treatment session subject game: gen temp=duration if period==1
	bysort treatment session subject (game): gen tot_duration=sum(temp)-duration
	label variable tot_duration  "Total number of rounds played in PREVIOUS supergames" 
	drop temp

*	SESSION TOTAL DURATION: Number of rounds played in the ENTIRE session
	by treatment session subject (period_session), sort: gen session_duration=period_session[_N] 
	label variable session_duration  "Total number of rounds played in SESSION" 
	
*	Define Fixed Pair and Mixed Group
	gen partnership=(groupsize==2)
	label define templ 0 "Mixed Group" 1 "Fixed Pair"
	label values partnership templ	
	label variable partnership "Group Configuration"

*	ORDER of session: 2-2-12-12 or reverse
	bysort treatment session (game subject period): gen order=(groupsize[1]==12)
	label variable order "Order of session" 
	label define orderl 0 "pairs first"  1 "large first"   
	label values order orderl


*==Supergame 5 Group corresponds to subject's choice
	gsort treatment session country subject game
	by treatment session country subject (game), sort: gen size5=groupsize[_N]
	label variable size5 "Group size in game 5 (this subject)"

	order treatment session game country subject groupsize partnership period type size5

	gen desiredgroup=cond(voteNEW==1 & size5==2,1,0)	
	replace desiredgroup=1 if voteNEW==2 & size5==16
	replace desiredgroup=1 if voteNEW==3 & size5==24
	label variable desiredgroup "Game 5 group = subject's choice"

	
***PAYMENTS***
*	Average DOLLAR earnings for a subject in a game
	bysort treatment session game subject (period): egen dollar=sum(profit)
	replace dollar=dollar*0.18
	label variable dollar "$$ earnings in game" 
	
*	DOLLAR PAYMENTS for a subject in the session
	gen quiz=round(payoffCQ1,0.25)	
	
	gen paid=0.25*ceil(dollar/0.25) if treatment==1 & session==1 & game==1 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==2 & game==2 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==3 & game==5 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==4 & game==5 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==5 & game==3 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==6 & game==4 
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==7 & game==3
	replace paid=0.25*ceil(dollar/0.25) if treatment==1 & session==8 & game==3
	
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==1 & game==4 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==2 & game==1 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==3 & game==4 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==4 & game==5 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==5 & game==3 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==6 & game==1 
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==7 & game== 5
	replace paid=0.25*ceil(dollar/0.25) if treatment==2 & session==8 & game==3
	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==1 & game==5 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==2 & game==2 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==3 & game==5 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==4 & game==4 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==5 & game==4 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==6 & game==5 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==7 & game==3 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==3 & session==8 & game==5 	
		
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==1 & game==3 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==2 & game==2 	
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==3 & game==1
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==4 & game==1
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==5 & game==5
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==6 & game==2
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==7 & game==4
	replace paid=0.25*ceil(dollar/0.25) if treatment==4 & session==8 & game==4
		
	label variable paid "Salient earnings" 
			
*	Individual characteristics from quiz before experiment (10 answers in quiz)	
	egen response_t=rowtotal(ResponseTime*)
	gen wrong_ans=10-RightAnswer
	replace wrong_ans=wrong_ans/10
	drop ResponseTime* RightAnswer
		

***  Identify SUBJECTS, ECONOMIES  & SESSIONS in the experiment
	egen ID=group(treatment session subject)
	dis ID[_N]
	label variable ID "Subject unique ID in Experiment"

	gen groupID=group+100*(game-1)+1000*(session-1)+10000*(treatment-1)
	egen temp=group(groupID)
	replace groupID=temp
	label variable groupID "Economy unique ID in experiment"
	drop temp
	
	egen sessionID=group(treatment session)
	label variable sessionID "Session unique ID in experiment"
	
**	Average cooperation & profit in Phase 1   (by groupsize and game - recall order is 0 if 2-2-12-12 and 1 if 12-12-2-2)
	bysort treat sessionID ID game (period): egen avgcFP=mean(outcome) if groupsize==2
	bysort treat sessionID ID game (period): egen avgcMG=mean(outcome) if groupsize>2
	bysort treat sessionID ID game (period): egen avgpMG=mean(profit) if groupsize>2
	
	separate avgcFP, by(game)
	separate avgcMG, by(game)
	separate avgpMG, by(game)

	local game "1 2 3 4 5"
		foreach i of local game {
		by ID (avgcFP`i'), sort: gen temp1=avgcFP`i'[1]
		replace avgcFP`i'=temp1
		by ID (avgcMG`i'), sort: gen temp2=avgcMG`i'[1]
		replace avgcMG`i'=temp2
		by ID (avgpMG`i'), sort: gen temp3=avgpMG`i'[1]
		replace avgpMG`i'=temp3
		drop temp1 temp2 temp3
		}
		
	egen temp=rowmean(avgcFP1 avgcFP2 avgcFP3 avgcFP4)
	replace avgcFP=temp
	drop temp
	label variable avgcFP "Subject's Average Cooperation in Fixed Pairs Phase 1"

	egen temp=rowmean(avgcMG1 avgcMG2 avgcMG3 avgcMG4)
	replace avgcMG=temp
	drop temp
	label variable avgcMG "Subject's Average Cooperation in Mixed Groups Phase 1"

	egen temp=rowmean(avgpMG1 avgpMG2 avgpMG3 avgpMG4)
	replace avgpMG=temp
	drop temp
	label variable avgpMG "Subject's Average Profit in Mixed Groups Phase 1"

	bysort treat sessionID ID game (period): egen avgc=mean(outcome)
	label variable avgc "Subject's Average Cooperation - by game"


**	Full cooperation in Fixed Pairs of Phase 1 (consider order of play 2-2-12-12 and 12-12-2-2)
	bysort treat sessionID ID game (period): egen temp=total(outcome) if groupsize==2
	gen fullc=(temp==duration)
	drop temp
	separate fullc, by(game)
	drop fullc5
	by ID (fullc1), sort: gen temp=fullc1[1]  if order==0
	replace fullc1=temp if order==0
	drop temp
	by ID (fullc2), sort: gen temp=fullc2[1]  if order==0
	replace fullc2=temp if order==0
	drop temp
	
	by ID (fullc3), sort: gen temp=fullc3[1]  if order==1
	replace fullc1=temp if order==1
	drop temp fullc3
	by ID (fullc4), sort: gen temp=fullc4[1]  if order==1
	replace fullc2=temp if order==1
	drop temp fullc4
		
	replace fullc=fullc1+fullc2
	label variable fullc "# of Phase 1 Fixed Pairs with Full C"

	label variable group "Economy"
	label variable wrong_ans "# of wrong answers in Quiz"
	label variable response_t "Response time in Quiz"
	
*	===========================================================================
* 	TABLE: Statistics about periods, payments, sex  and understanding of incorrect answers
* 	===========================================================================
	tabstat duration paid quiz sex wrong_ans, stat(mean semean min max) col(stat) f(%6.2f) long

	table (session) (game) if inlist(treatment, 1), statistic(mean  dollar paid duration)  nformat(%9.2f) totals(session)
	table (session) (game) if inlist(treatment, 2), statistic(mean  dollar paid duration)  nformat(%9.2f) totals(session)
	table (session) (game) if inlist(treatment, 3), statistic(mean  dollar paid duration)  nformat(%9.2f) totals(session)
	table (session) (game) if inlist(treatment, 4), statistic(mean  dollar paid duration)  nformat(%9.2f) totals(session)

	drop dollar quiz
	order treatment session game country subject ID period type groupID groupsize partnership 
	
	save workingdata.dta, replace